<?php
/**
 * @Author: pan
 * @Date:   2020-10-30 16:46:46
 * @Content: use php_curl write  little reptile
 * @Last Modified by:   pan
 * @Last Modified time: 2020-11-01 12:53:09
 */
function setCurl($url,$proxy_flag=false,$proxy=[]){
   $ch=curl_init($url);
   //return string
   curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);
   // loading no time 
   curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,0);
   // echo no header
   curl_setopt($ch,CURLOPT_HEADER,false);
   // execute time < 120s
   curl_setopt($ch,CURLOPT_TIMEOUT,120);
   // https or http?
   if('https'== substr($url, 0,5)){
        //avoid ssl request
        curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,false);
        //avoid hostname
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,false);
   }
   // proxy?
   if($proxy_flag){
        //proxy
        curl_setopt($ch,CURLOPT_PROXY,$proxy['name'].':'.$proxy['pass'].'@'.$proxy['host']);
        //proxy port
        curl_setopt($ch,CURLOPT_PROXYPORT,$proxy['port']);
   }
   //execute
   $result=curl_exec($ch);
   curl_close($ch);
   return $result;
};
// delete all space
function trim_all($str){
    $arr=['',' ','  ',"\n","\r","\t",'&nbsp;'];
    $str=str_replace($arr,'',$str);
    $str=preg_replace('/\x{00a0}/u','',$str);
    return $str;
};


/**
  @demo
**/  
// handle baidubaike zhanglang info 
function handlebaikeinfo($baike_info){
    //strip_tags filter html
    $baike_arr=[];
    preg_match('/出品公司(.*)\<\/dd\>/U',$baike_info,$match);
    $baike_arr['出品公司']=strip_tags($match[1]);
    preg_match('/中文名(.*)\<\/dd\>/U',$baike_info,$match);
    $baike_arr['中文名']=strip_tags($match[1]);
    preg_match('/外文名(.*)\<supclass/U',$baike_info,$match);
    $baike_arr['外文名']=strip_tags($match[1]);
    var_dump($baike_arr);
}
$url="https://baike.baidu.com/item/%E6%88%98%E7%8B%BC%E2%85%A1?fromtitle=%E6%88%98%E7%8B%BC2&fromid=17196087";
;
echo '<pre>';
handlebaikeinfo(trim_all(setCurl($url)));